
library(matchMulti)
library(dplyr)
library(ggplot2)

# Load Data
rm(list=ls())
data(catholic_schools)

catholic_schools$sectorf <- factor(catholic_schools$sector, label=c("Public", "Catholic"))

# Covariate Lists
student.cov <- c('minority','female','ses')
school.cov <- c('minority_mean', 'female_mean', 'size', 'acad', 'discrm', 'ses_mean')
all.cov <- c('minority','female','ses','minority_mean', 'female_mean', 'size', 'acad', 'discrm', 'ses_mean')


# Balance Before Trimming
bal_unmatched <- balanceTable(catholic_schools[c(all.cov,'sector')],  treatment = 'sector')  
bal_unmatched
  
# Remove Non-Coed Schools
catholic_schools <- catholic_schools %>% filter(female_mean>.30, female_mean<.75)

# Balance After Trimming 
bal_trimmed <- balanceTable(catholic_schools[c(all.cov,'sector')],  treatment = 'sector')
bal_trimmed

#####################################################################
##                                 Matching                        ##
#####################################################################

# Create Discrete Measures from Continuous Covariates for Fine Balance
catholic_schools$acad_cut <- cut(catholic_schools$acad, 2)
catholic_schools$size_cut <- cut(catholic_schools$size, 2)
catholic_schools$discrm_cut <- cut(catholic_schools$discrm, 2)
catholic_schools$ses_cut <- cut(catholic_schools$ses_mean, 2)
catholic_schools$fem_cut <- cut(catholic_schools$female_mean, 2)
catholic_schools$min_cut <- cut(catholic_schools$minority_mean, 2)

# First Match
match_1 <- matchMulti(catholic_schools, treatment = 'sector', 
                       school.id = 'school', match.students = FALSE, verbose=TRUE, 
                       student.vars = student.cov)


l1 <- c('acad_cut', 'ses_cut')
l2 <- c(l1, 'size_cut')

# Now Prioritize Acad and Discrm
match_2 <- rematchSchools(match_1, catholic_schools,  tol = 1e2,
                            school.fb = list(l1,l2))

# More Complex Balance Structure
l1 <- c( 'discrm_cut')
l2 <- c(l1, 'acad_cut','size_cut', 'ses_cut')
l3 <- c(l2, 'fem_cut','min_cut')

match_3 <- rematchSchools(match_1, catholic_schools, tol = 1e2,
                            school.fb = list( l1, l2, l3))


# Trim Schools
match_4 <- matchMulti(catholic_schools, treatment = 'sector', 
                       school.id = 'school', match.students = FALSE, verbose=TRUE, 
                       student.vars = student.cov, tol = 1e2, 
                       keep.target = 20,
                       school.fb = list(l1, l2, l3))   
                       
describe_data_counts(match_4$matched, 
                      treatment = "sector", school.id="school" )  
                      
                                                                         
# Try 10 Schools
match_5 <- matchMulti(catholic_schools, treatment = 'sector', 
                       school.id = 'school', match.students = FALSE, verbose=TRUE, 
                       student.vars = student.cov, tol = 1e2, 
                       keep.target = 10,
                       school.fb = list(l1, l2, l3))                             
                            
describe_data_counts(match_5$matched, 
                      treatment = "sector", school.id="school" )  

setwd("~/Dropbox/Group Matches/Analysis/Catholic/ReplicationFiles/R output")
save(bal_unmatched, bal_trimmed, match_1, match_2, match_3, match_4, match_5, file = "cschools_matchout.RData")







